## Reproduction script for:
## Mallinson, Daniel J. 2015. "Building a Better Speed Trap: Measuring Policy
## Adoption Speed in the American States." State Politics & Policy Quarterly.

rm(list = ls(all = TRUE))

########################################################################################
######################### Create speed measures ########################################
########################################################################################

library(foreign)

#Read in adoption dataset
allpolicy <- read.csv("speed_trap_adopts.csv")
data <- allpolicy

all.states <- sort(unique(data$state))  # Pull out unique state names
policyallnames <- colnames(data[,7:180])  # Pull out unique policy names

#Create survival dataset using original adoptions data; creates column of ones and zeros 
#for adoption indicator and column with duration time

stateyear <- data[c(1:5)] # Starter dataset with state indicators

for(i in 7:180){
  policy <- data.frame(data[c(i)])
  policy.min <- min(policy, na.rm=TRUE)
  policy.ind <- as.data.frame(matrix(NA, nrow=length(all.states), ncol=1))
  names(policy.ind) <- paste(colnames(policy),".ind", sep="")
  policy.ind[policy!="NA"] = 1
  policy.ind <- replace(policy.ind, is.na(policy.ind), 0)
  policy.dur <- policy
  names(policy.dur) <- paste(colnames(policy),".dur",sep="")
  policy.dur = policy.dur-(min(policy.dur, na.rm=TRUE)-1)
  policy.dur <- replace(policy.dur, is.na(policy.dur), (max(policy, na.rm=TRUE))-(min(policy, na.rm=TRUE)-1)) #Changed from max+1 to max
  if(i==7){
    finaldata <- cbind(stateyear, policy, policy.ind, policy.dur)
  }
  else{
    finaldata <- cbind(finaldata, policy, policy.ind, policy.dur)
  }
}

#Pull out the first year of adoption for each policy
for(i in 7:180){
  policy <- data.frame(data[c(i)])
  policy.min <- min(policy, na.rm=TRUE)
  if(i==7){
    firstadopt <- policy.min
  }
  else{
    firstadopt <- rbind(firstadopt, policy.min)
  }
}

#Pull out the last year of adoption for each policy
for(i in 7:180){
  policy <- data.frame(data[c(i)])
  policy.max <- max(policy, na.rm=TRUE)
  if(i==7){
    lastadopt <- policy.max
  }
  else{
    lastadopt <- rbind(lastadopt, policy.max)
  }
}

#Create a two column dataframe with first and last adoption
totalyears <- lastadopt-firstadopt
firstadopt <- as.data.frame(firstadopt)
row.names(firstadopt) <- NULL
names(firstadopt) <- "first"
lastadopt <- as.data.frame(lastadopt)
row.names(lastadopt) <- NULL
names(lastadopt) <- "last"
totalyears <- as.data.frame(totalyears)
row.names(totalyears) <- NULL
names(totalyears) <- "totalyears"
adoptyears <- cbind(firstadopt, lastadopt, totalyears)

##Calculate the total number of adopting states
for(i in 7:180){
  policy <- data.frame(data[c(i)])
  policy.sum <- sum(complete.cases(policy))
  if(i==7){
    totalstate <- policy.sum
  }
  else{
    totalstate <- rbind(totalstate, policy.sum)
  }
}

totalstate <- as.data.frame(totalstate)
names(totalstate) <- "totalstates"

############## Calculate N-C 2009 Indicator ###################

#The dependent variable used here is a dichotomous measure coded 1 for those policies where over 50% of adopters do so within the first third of the temporal distribution and 0 otherwise. (Nicholson-Crotty, Sean. 2009. "The Politics of Diffusion: Public Policy in the American States. Journal of Politics. 71(1): 192-205). Page 197.

#Instead of determining the trail off in adoptions, I use the entire span of adoptions to calculate Nicholson-Crotty's speed measure. Essentially, I measure the total span of time for observed adoptions and calculate one-third of it. I then measure the percentage of adopters that did so before that one-third cut off. If more than 50 percent adopted in that first third of the adoption time, they are marked as fast adopters. 

max <- length(finaldata)-2

sequence <- seq(6,max,3)

nc_fast <- rep(0, 174)
percent.below.cut <- rep(0,174)

j <- 0

for(i in sequence){
	usedata <- cbind(finaldata[,i], finaldata[,i+1], finaldata[,i+2])
	usedata <- usedata[which(usedata[,2] == 1),]
	dur <- max(usedata[,3])
	cut <- round(.33*dur, digits=0)
	percent <- sum(as.numeric(usedata[,3] <= cut))/nrow(usedata)
      j <- j+1	
      percent.below.cut[j] <- percent*100 #Record percentage of adoptions below .33 cut off
	if((percent >= .5) == TRUE){nc_fast[j] <- 1}
}

###############################################################
############# Calculate speed measures for ####################
#############   cross-sectional analysis   ####################
###############################################################

#install.packages("eha")
library(eha)

################# weibull #####################

max <- length(finaldata)-2

sequence <- seq(6,max,3)

for(i in sequence){
  output <- survreg(Surv(time=finaldata[,i+2], event=finaldata[,i+1], type="right")~1, dist="weibull", data=finaldata)
  if(i==6){
    const <- as.data.frame(output$coefficients)
    logp <- as.data.frame(log(1/output$scale))
    se <- as.matrix(sqrt(diag(output$var)))
    se.const <- as.data.frame(se[1,1])
    se.logp <- as.data.frame(se[2,1])
    names(const) <- paste(colnames(finaldata[i]))
    names(logp) <- paste(colnames(finaldata[i]))
    names(se.const) <- paste(colnames(finaldata[i]))
    names(se.logp) <- paste(colnames(finaldata[i]))
    const.history <- const
    logp.history <- logp
    se.const.history <- se.const
    se.logp.history <- se.logp
  }
  else{
  const <- as.data.frame(output$coefficients)
  logp <- as.data.frame(log(1/output$scale))
  se <- as.matrix(sqrt(diag(output$var)))
  se.const <- as.data.frame(se[1,1])
  se.logp <- as.data.frame(se[2,1])
  names(const) <- paste(colnames(finaldata[i]))
  names(logp) <- paste(colnames(finaldata[i]))
  names(se.const) <- paste(colnames(finaldata[i]))
  names(se.logp) <- paste(colnames(finaldata[i]))
  const.history <- cbind(const.history, const)
  logp.history <- cbind(logp.history, logp)
  se.const.history <- cbind(se.const.history, se.const)
  se.logp.history <- cbind(se.logp.history, se.logp)
  }
}

const.history <- as.data.frame(t(const.history))
names(const.history) <- "weib.speed"
se.const.history <- as.data.frame(t(se.const.history))
names(se.const.history) <- "weib.speed.se"
logp.history <- as.data.frame(t(logp.history))
names(logp.history) <- "weib.scale"
se.logp.history <- as.data.frame(t(se.logp.history))
names(se.logp.history) <- "weib.scale.se"

policyallnames <- as.data.frame(policyallnames)
names(policyallnames) <- "policy"

speeddata <- cbind(policyallnames, adoptyears, totalstate, const.history, se.const.history, logp.history, se.logp.history)


################# exponential #####################

max <- length(finaldata)-2

sequence <- seq(6,max,3)

for(i in sequence){
  output <- survreg(Surv(time=finaldata[,i+2], event=finaldata[,i+1], type="right")~1, dist="exponential", data=finaldata)
  if(i==6){
    const <- as.data.frame(output$coefficients)
    se <- as.matrix(sqrt(diag(output$var)))
    se.const <- as.data.frame(se[1,1])
    names(const) <- paste(colnames(finaldata[i]))
    names(se.const) <- paste(colnames(finaldata[i]))
    const.history <- const
    se.const.history <- se.const
  }
  else{
  const <- as.data.frame(output$coefficients)
  se <- as.matrix(sqrt(diag(output$var)))
  se.const <- as.data.frame(se[1,1])
  names(const) <- paste(colnames(finaldata[i]))
  names(se.const) <- paste(colnames(finaldata[i]))
  const.history <- cbind(const.history, const)
  se.const.history <- cbind(se.const.history, se.const)
  }
}

const.history <- as.data.frame(t(const.history))
names(const.history) <- "exp.speed"
se.const.history <- as.data.frame(t(se.const.history))
names(se.const.history) <- "exp.speed.se"


speeddata <- cbind(speeddata, const.history, se.const.history)

################# lognormal #####################

max <- length(finaldata)-2

sequence <- seq(6,max,3)

for(i in sequence){
  output <- survreg(Surv(time=finaldata[,i+2], event=finaldata[,i+1], type="right")~1, dist="lognormal", data=finaldata)
  if(i==6){
    const <- as.data.frame(output$coefficients)
    logp <- as.data.frame(log(1/output$scale))
    se <- as.matrix(sqrt(diag(output$var)))
    se.const <- as.data.frame(se[1,1])
    se.logp <- as.data.frame(se[2,1])
    names(const) <- paste(colnames(finaldata[i]))
    names(logp) <- paste(colnames(finaldata[i]))
    names(se.const) <- paste(colnames(finaldata[i]))
    names(se.logp) <- paste(colnames(finaldata[i]))
    const.history <- const
    logp.history <- logp
    se.const.history <- se.const
    se.logp.history <- se.logp
  }
  else{
  const <- as.data.frame(output$coefficients)
  logp <- as.data.frame(log(1/output$scale))
  se <- as.matrix(sqrt(diag(output$var)))
  se.const <- as.data.frame(se[1,1])
  se.logp <- as.data.frame(se[2,1])
  names(const) <- paste(colnames(finaldata[i]))
  names(logp) <- paste(colnames(finaldata[i]))
  names(se.const) <- paste(colnames(finaldata[i]))
  names(se.logp) <- paste(colnames(finaldata[i]))
  const.history <- cbind(const.history, const)
  logp.history <- cbind(logp.history, logp)
  se.const.history <- cbind(se.const.history, se.const)
  se.logp.history <- cbind(se.logp.history, se.logp)
  }
}

const.history <- as.data.frame(t(const.history))
names(const.history) <- "lognorm.speed"
se.const.history <- as.data.frame(t(se.const.history))
names(se.const.history) <- "lognorm.speed.se"
logp.history <- as.data.frame(t(logp.history))
names(logp.history) <- "lognorm.scale"
se.logp.history <- as.data.frame(t(se.logp.history))
names(se.logp.history) <- "lognorm.scale.se"


speeddata <- cbind(speeddata, const.history, se.const.history, logp.history, se.logp.history)


################# loglogistic #####################

max <- length(finaldata)-2

sequence <- seq(6,max,3)

for(i in sequence){
  output <- survreg(Surv(time=finaldata[,i+2], event=finaldata[,i+1], type="right")~1, dist="loglogistic", data=finaldata)
  if(i==6){
    const <- as.data.frame(output$coefficients)
    logp <- as.data.frame(log(1/output$scale))
    se <- as.matrix(sqrt(diag(output$var)))
    se.const <- as.data.frame(se[1,1])
    se.logp <- as.data.frame(se[2,1])
    names(const) <- paste(colnames(finaldata[i]))
    names(logp) <- paste(colnames(finaldata[i]))
    names(se.const) <- paste(colnames(finaldata[i]))
    names(se.logp) <- paste(colnames(finaldata[i]))
    const.history <- const
    logp.history <- logp
    se.const.history <- se.const
    se.logp.history <- se.logp
  }
  else{
  const <- as.data.frame(output$coefficients)
  logp <- as.data.frame(log(1/output$scale))
  se <- as.matrix(sqrt(diag(output$var)))
  se.const <- as.data.frame(se[1,1])
  se.logp <- as.data.frame(se[2,1])
  names(const) <- paste(colnames(finaldata[i]))
  names(logp) <- paste(colnames(finaldata[i]))
  names(se.const) <- paste(colnames(finaldata[i]))
  names(se.logp) <- paste(colnames(finaldata[i]))
  const.history <- cbind(const.history, const)
  logp.history <- cbind(logp.history, logp)
  se.const.history <- cbind(se.const.history, se.const)
  se.logp.history <- cbind(se.logp.history, se.logp)
  }
}

const.history <- as.data.frame(t(const.history))
names(const.history) <- "loglog.speed"
se.const.history <- as.data.frame(t(se.const.history))
names(se.const.history) <- "loglog.speed.se"
logp.history <- as.data.frame(t(logp.history))
names(logp.history) <- "loglog.scale"
se.logp.history <- as.data.frame(t(se.logp.history))
names(se.logp.history) <- "loglog.scale.se"


speeddata <- cbind(speeddata, const.history, se.const.history, logp.history, se.logp.history)

#Export Raw speed coefficients
write.dta(speeddata, file="all_raw_speed.dta")
write.csv(speeddata, file="all_raw_speed.csv")

##Rescale speed coefficient to range from zero to one
#First, subtract all speed values from 1 so they range from slowest to fastest

weib.rescale.speed <- 1-speeddata$weib.speed
weib.rescale.speed <- as.data.frame((weib.rescale.speed - min(weib.rescale.speed))/(max(weib.rescale.speed) - min(weib.rescale.speed)))
names(weib.rescale.speed) <- "weib.rescale.speed"

exp.rescale.speed <- 1-speeddata$exp.speed
exp.rescale.speed <- as.data.frame((exp.rescale.speed - min(exp.rescale.speed))/(max(exp.rescale.speed) - min(exp.rescale.speed)))
names(exp.rescale.speed) <- "exp.rescale.speed"

lognorm.rescale.speed <- 1-speeddata$lognorm.speed
lognorm.rescale.speed <- as.data.frame((lognorm.rescale.speed - min(lognorm.rescale.speed))/(max(lognorm.rescale.speed) - min(lognorm.rescale.speed)))
names(lognorm.rescale.speed) <- "lognorm.rescale.speed"

loglog.rescale.speed <- 1-speeddata$loglog.speed
loglog.rescale.speed <- as.data.frame((loglog.rescale.speed - min(loglog.rescale.speed))/(max(loglog.rescale.speed) - min(loglog.rescale.speed)))
names(loglog.rescale.speed) <- "loglog.rescale.speed"

speeddata <- cbind(speeddata, weib.rescale.speed, exp.rescale.speed, lognorm.rescale.speed, loglog.rescale.speed)

#Add Nicholson-Crotty 2009 Speed Measure

speeddata <- cbind(speeddata, nc_fast, percent.below.cut)


########## Fix errors in data ########

rownames(speeddata) <- NULL #Erase unnecessary row names

speeddata$policy <- as.character(speeddata$policy) #Convert factor to character

#Fix errors carried over from data management in STATA
speeddata$policy[which(speeddata$policy=="cocchwy")] <- "conacchwy"
speeddata$policy[which(speeddata$policy=="treso")] <- "natreso"
speeddata$policy[which(speeddata$policy=="postd")] <- "postdna"
speeddata$policy[which(speeddata$policy=="retaig")] <- "retainag"

#Delete two duplicated policies
speeddata <- speeddata[which(speeddata$policy!="contrains"),]
speeddata <- speeddata[which(speeddata$policy!="medmar"),]


##################### Export speed coefficients and adoptions #######################

write.csv(speeddata, "allspeedcoefs.csv", row.names=FALSE) # Export speed coefficients to csv
write.dta(speeddata, "allspeedcoefs.dta") # Export speed coefficients to dta

##############################################################################
############################ Pooled Weibull ##################################
##############################################################################


library(foreign)
library(eha)

#Read in adoption dataset
allpolicy <- read.csv("speed_trap_adopts.csv")
data <- allpolicy

all.states <- sort(unique(data$state))  # Pull out unique state names
policyallnames <- colnames(data[,7:180])  # Pull out unique policy names

#Create survival dataset using original adoptions data; creates column of ones and zeros 
#for adoption indicator and column with duration time

stateyear <- data[c(1:5)] # Starter dataset with state indicators

for(i in 7:180){
  policy <- data.frame(data[c(i)])
  policy.min <- min(policy, na.rm=TRUE)
  policy.max <- max(policy, na.rm=TRUE)
  policy.ind <- as.data.frame(matrix(NA, nrow=length(all.states), ncol=1))
  names(policy.ind) <- paste(colnames(policy),".ind", sep="")
  policy.ind[policy!="NA"] = 1
  policy.ind <- replace(policy.ind, is.na(policy.ind), 0)
  policy.dur <- policy
  names(policy.dur) <- paste(colnames(policy),".dur",sep="")
  policy.dur = policy.dur-(min(policy.dur, na.rm=TRUE)-1)
  policy.dur <- replace(policy.dur, is.na(policy.dur), (max(policy, na.rm=TRUE))-(min(policy, na.rm=TRUE)-1)) #Changed from max+1 to max
  policy <- replace(policy, is.na(policy), max(policy, na.rm=TRUE))
  policydata <- cbind(stateyear, policy, policy.ind, policy.dur)
  names(policydata) <- c("state", "state_no", "state_m","state_fips", "state_icpsr", "year", "adopt_indicator", "duration")
  policydata$policy <- colnames(policy)
  policydata$iteration <- i
  if(i==7){
    weib.finaldata <- policydata
  }
  else{
    weib.finaldata <- rbind(weib.finaldata, policydata)
  }
}

########## Fix errors in data ########

weib.finaldata$policy <- as.character(weib.finaldata$policy) #Convert factor to character

#Fix errors carried over from data management in STATA
weib.finaldata$policy[which(weib.finaldata$policy=="cocchwy")] <- "conacchwy"
weib.finaldata$policy[which(weib.finaldata$policy=="treso")] <- "natreso"
weib.finaldata$policy[which(weib.finaldata$policy=="postd")] <- "postdna"
weib.finaldata$policy[which(weib.finaldata$policy=="retaig")] <- "retainag"

#Delete two duplicated policies
weib.finaldata<- weib.finaldata[which(weib.finaldata$policy!="contrains"),]
weib.finaldata<- weib.finaldata[which(weib.finaldata$policy!="medmar"),]


################ Export Pooled Weibull Data #######################


write.csv(weib.finaldata, "pooled_weibull_raw.csv")
write.dta(weib.finaldata, "pooled_weibull_raw.dta")





#####################################################################################
######################## Analysis for manuscript ####################################
#####################################################################################

################### Import Speed Coefficients and covariates ########################

library(foreign)

speeddata <- read.csv("speedanalysis_merged.csv") #This dataset includes speed measures and covariates

data <- subset(speeddata, first>1945)  #Cut observations prior to 1946

## Put zeros in for missing salience and mip
data[is.na(data)] <- 0

#Run descriptive statistics for each variable in model (Appendix 1)
library(psych)
myvars <- c("weib.rescale.speed", "federal", "healthinsurance", "complexity_topic", "mip", "salience")
descriptive <- data[myvars]
describe(descriptive, skew=FALSE, ranges=FALSE)
library(xtable)
xtable(describe(descriptive))

#################################################################################
######################### Initial Analysis of Speed Measure #####################
#################################################################################

### Correlations between speed measures ###
#All are compared with the chosen Weibull

cor.test(speeddata$weib.rescale.speed, speeddata$exp.rescale.speed) # r(172) = 0.96, p < 0.001
cor.test(speeddata$weib.rescale.speed, speeddata$lognorm.rescale.speed) # r(172) = 0.98, p < 0.001
cor.test(speeddata$weib.rescale.speed, speeddata$loglog.rescale.speed) # r(172) = 0.98, p < 0.001


##### Plot cumulative adoptions for fast, slow, and mid policy (Figure 1) #######
curve.data <- read.csv("curve_data.csv")

slow.data <- curve.data[,1:3]
slow.data <- na.omit(slow.data)

fast.data <- curve.data[,4:6]
fast.data <- na.omit(fast.data)

mid.data <- curve.data[,8:9]
mid.data <- na.omit(mid.data)

#jpeg("cumulative_adopts_plot.jpg", width=5, height=5, units="in", res=600)
pdf("cumulative_adopts_plot.pdf", width=11, height=8.5)
plot(slow.data$earlvot_states~slow.data$earlvot_time, xlab="", ylab="", pch=20, ylim=c(0,50), xlim=c(0,35), col="black", axes=FALSE, cex=2)
lines(slow.data$earlvot_states~slow.data$earlvot_time, col="black", lwd=2)
text(21,16, "Early Voting (0.23)", pos=4, cex=1, col="black", font=2)
points(fast.data$oldagea_states~fast.data$oldagea_time, pch=15, cex=2, col="black")
lines(fast.data$oldagea_states~fast.data$oldagea_time, lwd=2, col="black")
text(4, 48, "Old Age Assistance (1)", pos=4, cex=1, col="black", font=2)
points(mid.data$juvisup_states~mid.data$juvisup_time, pch=18, col="black", cex=2)
lines(mid.data$juvisup_states~mid.data$juvisup_time, lwd=2, col="black")
text(16, 40, "Interstate Compact on Juveniles (0.58)", pos=4, cex=1, col="black", font=2)
axis(1, at=c(0,5,10,15,20,25,30,35), labels=c("0","5","10","15","20","25","30","35") )
axis(2, at=c(0,10,20,30,40,50), labels=c("0","10","20","30","40","50"))
title(main="", xlab="Number of Years Since First Adoption", ylab="Cumulative Number of Adopting States", font.lab=2, cex=2)
dev.off()


###### Plot comparison of speed coefficients and adoption times (Figure 2)  #######

fast <- speeddata[which(speeddata$nc_fast==1),]
slow <- speeddata[which(speeddata$nc_fast==0),]

#slowest policy = bottle

fast_correct <- fast[which(fast$weib.rescale.speed>0.5),]
fast_incorrect <- fast[which(fast$weib.rescale.speed<0.5),]
slow_correct <- slow[which(slow$weib.rescale.speed<0.5),]
slow_incorrect <- slow[which(slow$weib.rescale.speed>0.5),]

pdf("measure_comparison_alt.pdf", width=8, height=8)
plot.new()
par(mar=c(5,5,1,2))
plot.window(xlim=c(0,100), ylim=c(-.05,1))
#points(x=fast$percent.below.cut, y=fast$weib.rescale.speed, col="blue", pch=19)
#points(x=slow$percent.below.cut, y=slow$weib.rescale.speed, col="red", pch=19)
points(x=fast_correct$percent.below.cut, y=fast_correct$weib.rescale.speed, col="black", pch=19)
points(x=slow_correct$percent.below.cut, y=slow_correct$weib.rescale.speed, col="black", pch=19)
points(x=fast_incorrect$percent.below.cut, y=fast_incorrect$weib.rescale.speed, col="red", pch=17)
points(x=slow_incorrect$percent.below.cut, y=slow_incorrect$weib.rescale.speed, col="red", pch=17)
axis(1, at=c(0,10,20,30,40,50,60,70,80,90,100), labels=c(0,10,20,30,40,50,60,70,80,90,100))
axis(2, at=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0), labels=c(0,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,1.0))
#abline(v=50)
#segments(0,0,100,0, lty=3, col="black")
segments(50,0,50,1, lty=1, col="black")
segments(0,0.5,100,0.5, lty=1, col="black")
#abline(lm(fast$weib.rescale.speed~fast$totalyears), col="blue")
#abline(lm(slow$weib.rescale.speed~slow$totalyears), col="red")
#abline(lm(speeddata$weib.rescale.speed~speeddata$totalyears), col="black")
text(x=25, y=-.05, c("Slow Adoption (0)"), col="black")
text(x=75, y=-.05, c("Fast Adoption (1)"), col="black")
text(x=63, y=0.02, c("Bottle Deposits"), col="red", cex=0.8)
title(main="", xlab="Percentage of Adoptions Below 33% Cut Off", ylab="Speed Measure")
dev.off()

################## Speed changes over time (Figure 3) #########################

#jpeg("speed_time_scatter.jpg", height=8.5, width=11, units="in", res=1000)
pdf("speed_time_scatter.pdf", height=8.5, width=11)
scatter.smooth(speeddata$first, speeddata$weib.rescale.speed, ylab="Adoption Speed", xlab="Year of First Adoption", main="",pch=20, degree=2, evaluation=10, lpars=list(lwd=2, col="black"), axes=FALSE, font.lab=2)
abline(v=1965, col="black", lwd=2, lty=2)
text(1965, .98, "1965", cex=1, pos=4, col="black", font=2)
axis(1, at=c(1910, 1920, 1930, 1940, 1950, 1960, 1970, 1980, 1990, 2000, 2010), labels=c("1910", "1920", "1930","1940","1950","1960","1970","1980","1990","2000","2010"), font.lab=2, cex=2)
axis(2, at=c(0, .1,.2,.3,.4,.5,.6,.7,.8,.9,1), labels=c("0", "0.1", "0.2", "0.3","0.4","0.5","0.6","0.7","0.8","0.9","1"), font.lab=2, cex=2)
dev.off()


## T-test for average scope of policy adoption pre-1990 compared to post-1990
pre90 <- data[which(data$first<1990),]
post90 <- data[which(data$first>=1990),]
mean(pre90$totalstates) #30
sd(pre90$totalstates) #12.26
mean(post90$totalstates) #28
sd(post90$totalstates)#10.58
t.test(pre90$totalstates, post90$totalstates) # t(128) = 1.35, p = 0.18

## T-test for average scope of policy adoption pre-1965 and post-1965
pre65 <- data[which(data$first<1965),]
post65 <- data[which(data$first>=1965),]
mean(pre65$totalstates) #33
sd(pre65$totalstates) #13.40
mean(post65$totalstates) #28
sd(post65$totalstates)#11.30
t.test(pre65$totalstates, post65$totalstates) # t(128) = 1.29, p = 0.22


#################### Mean and SD from each MajorTopic (Figure 4) #################

library(gplots)
#jpeg("topic_means.jpg", height=8.5, width=11, units="in", res=600)
pdf("topic_means.pdf", height=8.5, width=11)
par(mar=c(6.5,5,1,2))
legends <- c("Fiscal","Civil Rights", "Health", "Agriculture", "Labor","Education", "Environment", "Energy", "Transportation", "Law", "Social Welfare", "Housing", "Commerce", "Trade", "Administration", "Resource Mgmt", "Local Gov't")
plotmeans(speeddata$weib.rescale.speed~factor(speeddata$MajorTopic), xlab="", ylab="Speed Measures", ylim=c(-0.05,1.05), minbar=0, maxbar=1, p=0.95, barcol="black", connect=FALSE, xaxt="n", pch=19)
axis(1, at=c(1:17), labels=FALSE)
text(c(1:17)-.4, par("usr")[3]-0.1, labels=legends, srt=45, pos=1, xpd=TRUE, cex=.9)
speed.mean <- mean(speeddata$weib.rescale.speed)
abline(h=speed.mean)
abline(h=0.5, lty=2)
dev.off()


#################### Pooled Weibull Analysis ############################
library(foreign)
library(eha)
library(lmtest)
library(survival)

pooled.raw <- read.dta("pooled_merged2.csv") #Includes covariates

pooled <- subset(pooled.raw, first>1945)  #Cut observations prior to 1946

## Put zeros in for missing salience and mip
pooled[is.na(pooled)] <- 0

## Need to seperately calculate the interaction term, so as to suppress the constituent term of complexity in the fixed effects models
pooled$interaction <- pooled$complexity_topic*pooled$mip

#Base model with single scale parameter (mip interaction)
base.pooled <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + cluster(policy), dist="weibull", data=pooled)
summary(base.pooled)
AIC(base.pooled)

#NYT Interaction
nyt.pooled <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + nyt*complexity_topic + cluster(policy), dist="weibull", data=pooled)
summary(nyt.pooled)
AIC(nyt.pooled)
lrtest(base.pooled, nyt.pooled) 

#Base plus FE for policy in regression equation (remove federal, healthinsurance, and complexity since they are policy invariant)
base.pooled.fe <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ nyt + mip + interaction + as.factor(policy) + cluster(policy) -1, dist="weibull", data=pooled)
summary(base.pooled.fe)
AIC(base.pooled.fe) 
lrtest(base.pooled, base.pooled.fe)

#Base plus all FE (remove federal, healthinsurance, and complexity since they are policy invariant)
base.pooled.allfe <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ nyt + mip + interaction + as.factor(policy) + strata(policy) + cluster(policy) - 1, dist="weibull", data=pooled, maxiter=1000)
summary(base.pooled.allfe)
AIC(base.pooled.allfe) 
lrtest(base.pooled, base.pooled.allfe)
lrtest(base.pooled.fe, base.pooled.allfe)

#Pull out non-crime policies
crimeless.pooled <- pooled[which(pooled$majortopic!=12),]

#Estimate model on non-crime policies
crimeless.pooled.out <- survreg(Surv(time=crimeless.pooled$duration, event=crimeless.pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + cluster(policy), dist="weibull", data=crimeless.pooled)
summary(crimeless.pooled.out)
AIC(crimeless.pooled.out)

crimeless.pooled.fe <- survreg(Surv(time=crimeless.pooled$duration, event=crimeless.pooled$adopt_indicator, type="right") ~ nyt + mip + complexity_topic + mip*complexity_topic + as.factor(policy) + strata(policy) + cluster(policy) - 1, dist="weibull", data=crimeless.pooled, maxiter=1000)
summary(crimeless.pooled.fe)

#Include measure of year
pooled$year_count <- pooled$year - min(pooled$first)
year.pooled.out <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + year_count + cluster(policy), dist="weibull", data=pooled)
summary(year.pooled.out)
AIC(year.pooled.out)
lrtest(base.pooled, year.pooled.out)

year.pooled.fe <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ nyt + mip + complexity_topic + mip*complexity_topic + as.factor(policy) + strata(policy) + year_count + cluster(policy) - 1, dist="weibull", data=pooled, maxiter=1000)
summary(year.pooled.fe)

###################### Distributional Comparison (Appendix 2) #########################

base.weibull <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + cluster(policy), dist="weibull", data=pooled)
summary(base.weibull)

base.exponential <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + cluster(policy), dist="exponential", data=pooled)
summary(base.exponential)
lrtest(base.weibull, base.exponential)

base.lognormal <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + cluster(policy), dist="lognormal", data=pooled)
summary(base.lognormal)
lrtest(base.weibull, base.lognormal)

base.loglogistic <- survreg(Surv(time=pooled$duration, event=pooled$adopt_indicator, type="right") ~ federal + healthinsurance +  nyt + mip + complexity_topic + mip*complexity_topic + cluster(policy), dist="loglogistic", data=pooled)
summary(base.loglogistic)
lrtest(base.weibull, base.loglogistic)


#################### Marginal Effects for Interaction (Figure 5) ########################

sal.complex.est <- base.pooled.allfe$coeff[2] + 1*base.pooled.allfe$coeff[3]
sal.complex.se <- sqrt(vcov(base.pooled.allfe)[2,2] + (1)^2*vcov(base.pooled.allfe)[3,3] + 2*(1)*vcov(base.pooled.allfe)[2,3])

sal.nocomplex.est <- base.pooled.allfe$coeff[2] + 0*base.pooled.allfe$coeff[3]
sal.nocomplex.se <- sqrt(vcov(base.pooled.allfe)[2,2] + (0)^2*vcov(base.pooled.allfe)[3,3] + 2*(0)*vcov(base.pooled.allfe)[2,3])

pdf("pooled_interaction_salience.pdf", height=6, width=6)
par(mar=c(3,5,2,2))
plot.new()
plot.window(xlim=c(0,2), ylim=c(-1,8))
points(x=0.5, y=sal.nocomplex.est, pch=20, cex=2)
lines(x=c(0.5,0.5), y=c((sal.nocomplex.est+(1.96*sal.nocomplex.se)), (sal.nocomplex.est-(1.96*sal.nocomplex.se))), lwd=1.5)
lines(x=c(0.45, 0.55), y=c((sal.nocomplex.est+(1.96*sal.nocomplex.se)), (sal.nocomplex.est+(1.96*sal.nocomplex.se))), lwd=1.5)
lines(x=c(0.45, 0.55), y=c((sal.nocomplex.est-(1.96*sal.nocomplex.se)), (sal.nocomplex.est-(1.96*sal.nocomplex.se))), lwd=1.5)
points(x=1.5, y=sal.complex.est, pch=20, cex=2)
lines(x=c(1.5,1.5), y=c((sal.complex.est+(1.96*sal.complex.se)), (sal.complex.est-(1.96*sal.complex.se))), lwd=1.5)
lines(x=c(1.45,1.55), y=c((sal.complex.est+(1.96*sal.complex.se)), (sal.complex.est+(1.96*sal.complex.se))), lwd=1.5)
lines(x=c(1.45,1.55), y=c((sal.complex.est-(1.96*sal.complex.se)), (sal.complex.est-(1.96*sal.complex.se))), lwd=1.5)
abline(h=0)
axis(1, at=c(0.5,1.5), label=c("Not Complex Policy", "Complex Policy"), lty=0)
axis(2, at=c(-1:8), label=c(-1:8))
title(main="", xlab="", ylab="Marginal Effect of Public Salience (MIP)")
dev.off()